/*- * See the file LICENSE for redistribution information. * * Copyright (c) 2002-2006 * Sleepycat Software. All rights reserved. * * $Id: FileManager.java,v 1.1 2006/05/06 09:00:04 ckaestne Exp $ */ package com.sleepycat.je.log; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.ClosedChannelException; import java.nio.channels.FileChannel; import java.nio.channels.FileLock; import java.nio.channels.OverlappingFileLockException; import java.util.Arrays; import java.util.HashMap; import java.util.Hashtable; import java.util.Iterator; import java.util.LinkedList; import java.util.Map; import java.util.Random; import java.util.Set; import java.util.zip.Checksum; import com.sleepycat.je.DatabaseException; import com.sleepycat.je.EnvironmentStats; import com.sleepycat.je.RunRecoveryException; import com.sleepycat.je.StatsConfig; import com.sleepycat.je.config.EnvironmentParams; import com.sleepycat.je.dbi.DbConfigManager; import com.sleepycat.je.dbi.EnvironmentImpl; import com.sleepycat.je.latch.Latch; import com.sleepycat.je.latch.LatchSupport; import com.sleepycat.je.log.entry.LogEntry; import com.sleepycat.je.utilint.Adler32; import com.sleepycat.je.utilint.DbLsn; import com.sleepycat.je.utilint.HexFormatter; /** * The FileManager presents the abstraction of one contiguous file. It doles out * LSNs. */ public class FileManager { public static class FileMode { public static final FileMode READ_MODE = new FileMode("r"); public static final FileMode READWRITE_MODE = new FileMode("rw"); private String fileModeValue; private FileMode(String fileModeValue) { this.fileModeValue = fileModeValue; } public String getModeValue() { return fileModeValue; } } static boolean IO_EXCEPTION_TESTING = false; private static final String DEBUG_NAME = FileManager.class.getName(); /* The number of writes that have been performed. */ private static long writeCount = 0; private static long stopOnWriteCount = Long.MAX_VALUE; public static final String JE_SUFFIX = ".jdb"; // regular log files public static final String CIF_SUFFIX = ".cif"; // cleaner info files public static final String DEL_SUFFIX = ".del"; // cleaned files public static final String BAD_SUFFIX = ".bad"; // corrupt files public static final String LOCK_SUFFIX = ".lck"; // lock files static final String[] DEL_SUFFIXES = { DEL_SUFFIX }; static final String[] JE_SUFFIXES = { JE_SUFFIX }; private static final String[] JE_AND_DEL_SUFFIXES = { JE_SUFFIX, DEL_SUFFIX }; /* May be set to false to speed unit tests. */ private boolean syncAtFileEnd = true; private EnvironmentImpl envImpl; private long maxFileSize; private File dbEnvHome; /* True if .del files should be included in the list of log files. */ private boolean includeDeletedFiles = false; /* File cache */ private FileCache fileCache; private Latch fileCacheLatch; /* The channel and lock for the je.lck file. */ private RandomAccessFile lockFile; private FileChannel channel; private FileLock envLock; private FileLock exclLock; /* True if all files should be opened readonly. */ private boolean readOnly; /* Handles onto log position */ private long currentFileNum; // number of the current file private long nextAvailableLsn; // nextLSN is the next one available private long lastUsedLsn; // last LSN used in the current log file private long prevOffset; // Offset to use for the previous pointer private boolean forceNewFile; // Force new file on next write /* * Saved versions of above. Save this in case a write causes an IOException, * we can back the log up to the last known good LSN. */ private long savedCurrentFileNum; private long savedNextAvailableLsn; // nextLSN is the next one available private long savedLastUsedLsn; // last LSN used in the current log file private long savedPrevOffset; // Offset to use for the previous pointer private boolean savedForceNewFile; /* endOfLog is used for writes and fsyncs to the end of the log. */ private LogEndFileDescriptor endOfLog; /* group commit sync */ private FSyncManager syncManager; /* * When we bump the LSNs over to a new file, we must remember the last LSN * of the previous file so we can set the prevOffset field of the file * header appropriately. We have to save it in a map because there's a time * lag between when we know what the last LSN is and when we actually do the * file write, because LSN bumping is done before we get a write buffer. * This map is keyed by file num->last LSN. */ private Map perFileLastUsedLsn; /* * If non-0, do NIO in chunks of this size. */ private long chunkedNIOSize = 0; /** * Set up the file cache and initialize the file manager to point to the * beginning of the log. * * @param configManager * @param dbEnvHome * environment home directory */ public FileManager(EnvironmentImpl envImpl, File dbEnvHome, boolean readOnly) throws DatabaseException { this.envImpl = envImpl; this.dbEnvHome = dbEnvHome; this.readOnly = readOnly; /* Read configurations. */ DbConfigManager configManager = envImpl.getConfigManager(); maxFileSize = configManager.getLong(EnvironmentParams.LOG_FILE_MAX); chunkedNIOSize = configManager .getLong(EnvironmentParams.LOG_CHUNKED_NIO); lockEnvironment(readOnly, false); /* Cache of files. */ fileCache = new FileCache(configManager); fileCacheLatch = LatchSupport.makeLatch(DEBUG_NAME + "_fileCache", envImpl); if (!dbEnvHome.exists()) { throw new LogException("Environment home " + dbEnvHome + " doesn't exist"); } /* Start out as if no log existed. */ currentFileNum = 0L; nextAvailableLsn = DbLsn.makeLsn(currentFileNum, firstLogEntryOffset()); lastUsedLsn = DbLsn.NULL_LSN; perFileLastUsedLsn = new HashMap(); prevOffset = 0L; endOfLog = new LogEndFileDescriptor(); forceNewFile = false; saveLastPosition(); String stopOnWriteProp = System.getProperty("je.debug.stopOnWrite"); if (stopOnWriteProp != null) { stopOnWriteCount = Long.parseLong(stopOnWriteProp); } syncManager = new FSyncManager(envImpl); } /** * Set the file manager's "end of log". * * @param nextAvailableLsn * LSN to be used for the next log entry * @param lastUsedLsn * last LSN to have a valid entry, may be null * @param prevOffset * value to use for the prevOffset of the next entry. If the * beginning of the file, this is 0. */ public void setLastPosition(long nextAvailableLsn, long lastUsedLsn, long prevOffset) { this.lastUsedLsn = lastUsedLsn; perFileLastUsedLsn.put(new Long(DbLsn.getFileNumber(lastUsedLsn)), new Long(lastUsedLsn)); this.nextAvailableLsn = nextAvailableLsn; currentFileNum = DbLsn.getFileNumber(this.nextAvailableLsn); this.prevOffset = prevOffset; saveLastPosition(); } /* * Cause the current LSN state to be saved in case we fail after we have * bumped the lsn pointer but before we've successfully marshalled into the * log buffer. */ void saveLastPosition() { savedNextAvailableLsn = nextAvailableLsn; savedLastUsedLsn = lastUsedLsn; savedPrevOffset = prevOffset; savedForceNewFile = forceNewFile; savedCurrentFileNum = currentFileNum; } void restoreLastPosition() { nextAvailableLsn = savedNextAvailableLsn; lastUsedLsn = savedLastUsedLsn; prevOffset = savedPrevOffset; forceNewFile = savedForceNewFile; currentFileNum = savedCurrentFileNum; } /** * May be used to disable sync at file end to speed unit tests. Must only be * used for unit testing, since log corruption may result. */ public void setSyncAtFileEnd(boolean sync) { syncAtFileEnd = sync; } /* * File management */ /** * public for cleaner. * * @return the number of the first file in this environment. */ public Long getFirstFileNum() { return getFileNum(true); } public boolean getReadOnly() { return readOnly; } /** * @return the number of the last file in this environment. */ public Long getLastFileNum() { return getFileNum(false); } /* * For unit tests. */ public long getCurrentFileNum() { return currentFileNum; } public void setIncludeDeletedFiles(boolean includeDeletedFiles) { this.includeDeletedFiles = includeDeletedFiles; } /** * Get all JE file numbers. * * @return an array of all JE file numbers. */ public Long[] getAllFileNumbers() { /* Get all the names in sorted order. */ String[] names = listFiles(JE_SUFFIXES); Long[] nums = new Long[names.length]; for (int i = 0; i < nums.length; i += 1) { nums[i] = getNumFromName(names[i]); } return nums; } /** * Get the next file number before/after currentFileNum. * * @param currentFileNum * the file we're at right now. Note that it may not exist, if * it's been cleaned and renamed. * @param forward * if true, we want the next larger file, if false we want the * previous file * @return null if there is no following file, or if filenum doesn't exist */ public Long getFollowingFileNum(long currentFileNum, boolean forward) { /* Get all the names in sorted order. */ String[] names = listFiles(JE_SUFFIXES); /* Search for the current file. */ String searchName = getFileName(currentFileNum, JE_SUFFIX); int foundIdx = Arrays.binarySearch(names, searchName); boolean foundTarget = false; if (foundIdx >= 0) { if (forward) { foundIdx++; } else { foundIdx--; } } else { /* * currentFileNum not found (might have been cleaned). FoundIdx will * be (-insertionPoint - 1). */ foundIdx = Math.abs(foundIdx + 1); if (!forward) { foundIdx--; } } /* The current fileNum is found, return the next or prev file. */ if (forward && (foundIdx < names.length)) { foundTarget = true; } else if (!forward && (foundIdx > -1)) { foundTarget = true; } if (foundTarget) { return getNumFromName(names[foundIdx]); } else { return null; } } /** * @return true if there are any files at all. */ public boolean filesExist() { String[] names = listFiles(JE_SUFFIXES); return (names.length != 0); } /** * Get the first or last file number in the set of je files. * * @param first * if true, get the first file, else get the last file * @return the file number or null if no files exist */ private Long getFileNum(boolean first) { String[] names = listFiles(JE_SUFFIXES); if (names.length == 0) { return null; } else { int index = 0; if (!first) { index = names.length - 1; } return getNumFromName(names[index]); } } /** * Get the file number from a file name. * * @param the * file name * @return the file number */ private Long getNumFromName(String fileName) { String fileNumber = fileName.substring(0, fileName.indexOf(".")); return new Long(Long.parseLong(fileNumber, 16)); } /** * Find je files. Return names sorted in ascending fashion. * * @param suffix * which type of file we're looking for * @return array of file names */ String[] listFiles(String[] suffixes) { String[] fileNames = dbEnvHome.list(new JEFileFilter(suffixes)); Arrays.sort(fileNames); return fileNames; } /** * Find je files, flavor for unit test support. * * @param suffix * which type of file we're looking for * @return array of file names */ public static String[] listFiles(File envDirFile, String[] suffixes) { String[] fileNames = envDirFile.list(new JEFileFilter(suffixes)); Arrays.sort(fileNames); return fileNames; } /** * @return the full file name and path for the nth je file. */ String[] getFullFileNames(long fileNum) { if (includeDeletedFiles) { int nSuffixes = JE_AND_DEL_SUFFIXES.length; String[] ret = new String[nSuffixes]; for (int i = 0; i < nSuffixes; i++) { ret[i] = getFullFileName(getFileName(fileNum, JE_AND_DEL_SUFFIXES[i])); } return ret; } else { return new String[] { getFullFileName(getFileName(fileNum, JE_SUFFIX)) }; } } /** * @return the full file name and path for the given file number and suffix. */ public String getFullFileName(long fileNum, String suffix) { return getFullFileName(getFileName(fileNum, suffix)); } /** * @return the full file name and path for this file name. */ private String getFullFileName(String fileName) { return dbEnvHome + File.separator + fileName; } /** * @return the file name for the nth file. */ public static String getFileName(long fileNum, String suffix) { /* * HexFormatter generates a 0 padded string starting with 0x. We want * the right most 8 digits, so start at 10. */ return (HexFormatter.formatLong(fileNum).substring(10) + suffix); } /** * Rename this file to NNNNNNNN.suffix. If that file already exists, try * NNNNNNNN.suffix.1, etc. Used for deleting files or moving corrupt files * aside. * * @param fileNum * the file we want to move * @param newSuffix * the new file suffix */ public void renameFile(long fileNum, String newSuffix) throws DatabaseException, IOException { int repeatNum = 0; boolean renamed = false; while (!renamed) { String generation = ""; if (repeatNum > 0) { generation = "." + repeatNum; } String newName = getFullFileName(getFileName(fileNum, newSuffix) + generation); File targetFile = new File(newName); if (targetFile.exists()) { repeatNum++; } else { String oldFileName = getFullFileNames(fileNum)[0]; clearFileCache(fileNum); File oldFile = new File(oldFileName); if (oldFile.renameTo(targetFile)) { renamed = true; } else { throw new LogException("Couldn't rename " + oldFileName + " to " + newName); } } } } /** * Delete log file NNNNNNNN. * * @param fileNum * the file we want to move */ public void deleteFile(long fileNum) throws DatabaseException, IOException { String fileName = getFullFileNames(fileNum)[0]; clearFileCache(fileNum); File file = new File(fileName); boolean done = file.delete(); if (!done) { throw new LogException("Couldn't delete " + file); } } /** * Return a read only file handle that corresponds the this file number. * Retrieve it from the cache or open it anew and validate the file header. * This method takes a latch on this file, so that the file descriptor will * be held in the cache as long as it's in use. When the user is done with * the file, the latch must be released. * * @param fileNum * which file * @return the file handle for the existing or newly created file */ FileHandle getFileHandle(long fileNum) throws LogException, DatabaseException { /* Check the file cache for this file. */ Long fileId = new Long(fileNum); FileHandle fileHandle = null; /** * Loop until we get an open FileHandle. */ while (true) { /* * The file cache is intentionally not latched here so that it's not * a bottleneck in the fast path. We check that the file handle that * we get back is really still open after we latch it down below. */ fileHandle = fileCache.get(fileId); /* The file wasn't in the cache. */ if (fileHandle == null) { fileCacheLatch.acquire(); try { /* Check the file cache again under the latch. */ fileHandle = fileCache.get(fileId); if (fileHandle == null) { fileHandle = makeFileHandle(fileNum, FileMode.READ_MODE); /* Put it into the cache. */ fileCache.add(fileId, fileHandle); } } finally { fileCacheLatch.release(); } } /* Get latch before returning */ fileHandle.latch(); /* * We may have obtained this file handle outside the file cache * latch, so we have to test that the handle is still valid. If it's * not, then loop back and try again. */ if (fileHandle.getFile() == null) { fileHandle.release(); } else { break; } } return fileHandle; } private FileHandle makeFileHandle(long fileNum, FileMode mode) throws DatabaseException { String[] fileNames = getFullFileNames(fileNum); RandomAccessFile newFile = null; String fileName = null; try { /* * Open the file. Note that we are going to try a few names to open * this file -- we'll try for N.jdb, and if that doesn't exist and * we're configured to look for all types, we'll look for N.del. */ FileNotFoundException FNFE = null; for (int i = 0; i < fileNames.length; i++) { fileName = fileNames[i]; try { newFile = new RandomAccessFile(fileName, mode .getModeValue()); break; } catch (FileNotFoundException e) { /* Save the first exception thrown. */ if (FNFE == null) { FNFE = e; } } } /* * If we didn't find the file or couldn't create it, rethrow the * exception. */ if (newFile == null) { throw FNFE; } boolean oldHeaderVersion = false; if (newFile.length() == 0) { /* * If the file is empty, reinitialize it if we can. If not, send * the file handle back up; the calling code will deal with the * fact that there's nothing there. */ if (mode == FileMode.READWRITE_MODE) { /* An empty file, write a header. */ long lastLsn = DbLsn.longToLsn((Long) perFileLastUsedLsn .remove(new Long(fileNum - 1))); long headerPrevOffset = 0; if (lastLsn != DbLsn.NULL_LSN) { headerPrevOffset = DbLsn.getFileOffset(lastLsn); } FileHeader fileHeader = new FileHeader(fileNum, headerPrevOffset); writeFileHeader(newFile, fileName, fileHeader); } } else { /* A non-empty file, check the header */ oldHeaderVersion = readAndValidateFileHeader(newFile, fileName, fileNum); } return new FileHandle(newFile, fileName, envImpl, oldHeaderVersion); } catch (FileNotFoundException e) { throw new LogFileNotFoundException("Couldn't open file " + fileName + ": " + e.getMessage()); } catch (DbChecksumException e) { /* * Let this exception go as a checksum exception, so it sets the run * recovery state correctly. */ closeFileInErrorCase(newFile); throw new DbChecksumException(envImpl, "Couldn't open file " + fileName, e); } catch (Throwable t) { /* * Catch Throwable here (rather than exception) because in unit test * mode, we run assertions and they throw errors. We want to clean * up the file object in all cases. */ closeFileInErrorCase(newFile); throw new DatabaseException("Couldn't open file " + fileName + ": " + t, t); } } /** * Close this file and eat any exceptions. Used in catch clauses. */ private void closeFileInErrorCase(RandomAccessFile file) { try { if (file != null) { file.close(); } } catch (IOException e) { /* * Klockwork - ok Couldn't close file, oh well. */ } } /** * Read the given je log file and validate the header. * * @throws DatabaseException * if the file header isn't valid * * @return whether the file header has an old version number. */ private boolean readAndValidateFileHeader(RandomAccessFile file, String fileName, long fileNum) throws DatabaseException, IOException { /* * Read the file header from this file. It's always the first log entry. */ LogManager logManager = envImpl.getLogManager(); LogEntry headerEntry = logManager.getLogEntry( DbLsn.makeLsn(fileNum, 0), file); FileHeader header = (FileHeader) headerEntry.getMainItem(); return header.validate(fileName, fileNum); } /** * Write a proper file header to the given file. */ private void writeFileHeader(RandomAccessFile file, String fileName, FileHeader header) throws DatabaseException, IOException { /* * Fail loudly if the environment is invalid. A RunRecoveryException * must have occurred. */ envImpl.checkIfInvalid(); /* * Fail silent if the environment is not open. */ if (envImpl.mayNotWrite()) { return; } /* Serialize the header into this buffer. */ int headerSize = header.getLogSize(); int entrySize = headerSize + LogManager.HEADER_BYTES; ByteBuffer headerBuf = envImpl.getLogManager().putIntoBuffer(header, headerSize, 0, false, entrySize); if (++writeCount >= stopOnWriteCount) { Runtime.getRuntime().halt(0xff); } /* Write the buffer into the channel. */ int bytesWritten; try { if (RUNRECOVERY_EXCEPTION_TESTING) { generateRunRecoveryException(file, headerBuf, 0); } bytesWritten = writeToFile(file, headerBuf, 0); } catch (ClosedChannelException e) { /* * The channel should never be closed. It may be closed because of * an interrupt received by another thread. See SR [#10463] */ throw new RunRecoveryException(envImpl, "Channel closed, may be due to thread interrupt", e); } catch (IOException e) { /* Possibly an out of disk exception. */ throw new RunRecoveryException(envImpl, "IOException caught: " + e); } if (bytesWritten != entrySize) { throw new LogException("File " + fileName + " was created with an incomplete header. Only " + bytesWritten + " bytes were written."); } } /** * @return the prevOffset field stored in the file header. */ long getFileHeaderPrevOffset(long fileNum) throws IOException, DatabaseException { LogEntry headerEntry = envImpl.getLogManager().getLogEntry( DbLsn.makeLsn(fileNum, 0)); FileHeader header = (FileHeader) headerEntry.getMainItem(); return header.getLastEntryInPrevFileOffset(); } /* * Support for writing new log entries */ /** * @return the file offset of the last LSN that was used. For constructing * the headers of log entries. If the last LSN that was used was in * a previous file, or this is the very first LSN of the whole * system, return 0. */ long getPrevEntryOffset() { return prevOffset; } /** * Increase the current log position by "size" bytes. Move the prevOffset * pointer along. * * @param size * is an unsigned int * @return true if we flipped to the next log file. */ boolean bumpLsn(long size) { /* Save copy of initial lsn state. */ saveLastPosition(); boolean flippedFiles = false; if (forceNewFile || (DbLsn.getFileOffset(nextAvailableLsn) + size) > maxFileSize) { forceNewFile = false; /* Move to another file. */ currentFileNum++; /* Remember the last used LSN of the previous file. */ if (lastUsedLsn != DbLsn.NULL_LSN) { perFileLastUsedLsn.put(new Long(DbLsn .getFileNumber(lastUsedLsn)), new Long(lastUsedLsn)); } prevOffset = 0; lastUsedLsn = DbLsn.makeLsn(currentFileNum, firstLogEntryOffset()); flippedFiles = true; } else { if (lastUsedLsn == DbLsn.NULL_LSN) { prevOffset = 0; } else { prevOffset = DbLsn.getFileOffset(lastUsedLsn); } lastUsedLsn = nextAvailableLsn; } nextAvailableLsn = DbLsn.makeLsn(DbLsn.getFileNumber(lastUsedLsn), (DbLsn.getFileOffset(lastUsedLsn) + size)); return flippedFiles; } /** * Write out a log buffer to the file. * * @param fullBuffer * buffer to write */ void writeLogBuffer(LogBuffer fullBuffer) throws DatabaseException { /* * Fail loudly if the environment is invalid. A RunRecoveryException * must have occurred. */ envImpl.checkIfInvalid(); /* * Fail silent if the environment is not open. */ if (envImpl.mayNotWrite()) { return; } /* Use the LSN to figure out what file to write this buffer to. */ long firstLsn = fullBuffer.getFirstLsn(); /* * Is there anything in this write buffer? We could have been called by * the environment shutdown, and nothing is actually in the buffer. */ if (firstLsn != DbLsn.NULL_LSN) { RandomAccessFile file = endOfLog.getWritableFile(DbLsn .getFileNumber(firstLsn)); ByteBuffer data = fullBuffer.getDataBuffer(); if (++writeCount >= stopOnWriteCount) { Runtime.getRuntime().halt(0xff); } try { /* * Check that we do not overwrite unless the file only contains * a header [#11915] [#12616]. */ assert fullBuffer.getRewriteAllowed() || (DbLsn.getFileOffset(firstLsn) >= file.length() || file .length() == firstLogEntryOffset()) : "FileManager would overwrite non-empty file 0x" + Long.toHexString(DbLsn.getFileNumber(firstLsn)) + " lsnOffset=0x" + Long.toHexString(DbLsn.getFileOffset(firstLsn)) + " fileLength=0x" + Long.toHexString(file.length()); if (IO_EXCEPTION_TESTING) { throw new IOException("generated for testing"); } if (RUNRECOVERY_EXCEPTION_TESTING) { generateRunRecoveryException(file, data, DbLsn .getFileOffset(firstLsn)); } writeToFile(file, data, DbLsn.getFileOffset(firstLsn)); } catch (ClosedChannelException e) { /* * The file should never be closed. It may be closed because of * an interrupt received by another thread. See SR [#10463]. */ throw new RunRecoveryException(envImpl, "File closed, may be due to thread interrupt", e); } catch (IOException IOE) { /* * Possibly an out of disk exception, but java.io will only tell * us IOException with no indication of whether it's out of disk * or something else. * * Since we can't tell what sectors were actually written to * disk, we need to change any commit records that might have * made it out to disk to abort records. If they made it to disk * on the write, then rewriting should allow them to be * rewritten. See [11271]. */ abortCommittedTxns(data); try { if (IO_EXCEPTION_TESTING) { throw new IOException("generated for testing"); } writeToFile(file, data, DbLsn.getFileOffset(firstLsn)); } catch (IOException IOE2) { fullBuffer.setRewriteAllowed(); throw new DatabaseException(IOE2); } if (false) throw new DatabaseException(IOE); } assert EnvironmentImpl.maybeForceYield(); } } /** * Write a buffer to a file at a given offset, using NIO if so configured. */ private int writeToFile(RandomAccessFile file, ByteBuffer data, long destOffset) throws IOException, DatabaseException { int totalBytesWritten = 0; // useNIO FileChannel channel = file.getChannel(); if (chunkedNIOSize > 0) { /* * We can't change the limit without impacting readers that might * find this buffer in the buffer pool. Duplicate the buffer so we * can set the limit independently. */ ByteBuffer useData = data.duplicate(); /* * Write small chunks of data by manipulating the position and limit * properties of the buffer, and submitting it for writing * repeatedly. * * For each chunk, the limit is set to the position + * chunkedNIOSize, capped by the original limit of the buffer. * * Preconditions: data to be written is betweek data.position() and * data.limit() * * Postconditions: data.limit() has not changed, data.position() == * data.limit(), offset of the channel has not been modified. */ int originalLimit = useData.limit(); useData.limit(useData.position()); while (useData.limit() < originalLimit) { useData.limit((int) (Math.min(useData.limit() + chunkedNIOSize, originalLimit))); int bytesWritten = channel.write(useData, destOffset); destOffset += bytesWritten; totalBytesWritten += bytesWritten; } } else { /* * Perform a single write using NIO. */ totalBytesWritten = channel.write(data, destOffset); } // !useNIO /* * Perform a RandomAccessFile write and update the buffer position. * ByteBuffer.array() is safe to use since all non-direct ByteBuffers * have a backing array. Synchronization on the file object is needed * because two threads may call seek() on the same file object. */ synchronized (file) { assert data.hasArray(); assert data.arrayOffset() == 0; int pos = data.position(); int size = data.limit() - pos; file.seek(destOffset); file.write(data.array(), pos, size); data.position(pos + size); totalBytesWritten = size; } return totalBytesWritten; } /** * Read a buffer from a file at a given offset, using NIO if so configured. */ void readFromFile(RandomAccessFile file, ByteBuffer readBuffer, long offset) throws IOException { // NIO FileChannel channel = file.getChannel(); if (chunkedNIOSize > 0) { /* * Read a chunk at a time to prevent large direct memory allocations * by NIO. */ int readLength = readBuffer.limit(); long currentPosition = offset; while (readBuffer.position() < readLength) { readBuffer.limit((int) (Math.min(readBuffer.limit() + chunkedNIOSize, readLength))); int bytesRead = channel.read(readBuffer, currentPosition); if (bytesRead < 1) break; currentPosition += bytesRead; } } else { /* * Perform a single read using NIO. */ channel.read(readBuffer, offset); } // !NIO /* * Perform a RandomAccessFile read and update the buffer position. * ByteBuffer.array() is safe to use since all non-direct ByteBuffers * have a backing array. Synchronization on the file object is needed * because two threads may call seek() on the same file object. */ synchronized (file) { assert readBuffer.hasArray(); assert readBuffer.arrayOffset() == 0; int pos = readBuffer.position(); int size = readBuffer.limit() - pos; file.seek(offset); int bytesRead = file.read(readBuffer.array(), pos, size); if (bytesRead > 0) { readBuffer.position(pos + bytesRead); } } } /* * Iterate through a buffer looking for commit records. Change all commit * records to abort records. */ private void abortCommittedTxns(ByteBuffer data) { final byte commitType = LogEntryType.LOG_TXN_COMMIT.getTypeNum(); final byte abortType = LogEntryType.LOG_TXN_ABORT.getTypeNum(); data.position(0); while (data.remaining() > 0) { int recStartPos = data.position(); data.position(recStartPos + LogManager.HEADER_ENTRY_TYPE_OFFSET); int typePos = data.position(); byte entryType = data.get(); boolean recomputeChecksum = false; if (entryType == commitType) { data.position(typePos); data.put(abortType); recomputeChecksum = true; } /* Move byte buffer past version. */ byte version = data.get(); /* Read the size, skipping over the prev offset. */ data.position(data.position() + LogManager.PREV_BYTES); int itemSize = LogUtils.readInt(data); int itemDataStartPos = data.position(); if (recomputeChecksum) { Checksum checksum = Adler32.makeChecksum(); data.position(recStartPos); /* Calculate the checksum and write it into the buffer. */ int nChecksumBytes = itemSize + (LogManager.HEADER_BYTES - LogManager.CHECKSUM_BYTES); byte[] checksumBytes = new byte[nChecksumBytes]; System.arraycopy(data.array(), recStartPos + LogManager.CHECKSUM_BYTES, checksumBytes, 0, nChecksumBytes); checksum.update(checksumBytes, 0, nChecksumBytes); LogUtils.writeUnsignedInt(data, checksum.getValue()); } data.position(itemDataStartPos + itemSize); } data.position(0); } /** * FSync the end of the log. */ void syncLogEnd() throws DatabaseException { try { endOfLog.force(); } catch (IOException e) { throw new DatabaseException(e); } } /** * Sync the end of the log, close off this log file. Should only be called * under the log write latch. */ void syncLogEndAndFinishFile() throws DatabaseException, IOException { if (syncAtFileEnd) { syncLogEnd(); } endOfLog.close(); } /** * Flush a file using the group sync mechanism, trying to amortize off other * syncs. */ void groupSync() throws DatabaseException { syncManager.fsync(); } /** * Close all file handles and empty the cache. */ public void clear() throws IOException, DatabaseException { fileCacheLatch.acquire(); try { fileCache.clear(); } finally { fileCacheLatch.release(); } endOfLog.close(); } /** * Clear the file lock. */ public void close() throws IOException, DatabaseException { if (envLock != null) { envLock.release(); } if (exclLock != null) { exclLock.release(); } if (channel != null) { channel.close(); } if (lockFile != null) { lockFile.close(); } } /** * Lock the environment. Return true if the lock was acquired. If exclusive * is false, then this implements a single writer, multiple reader lock. If * exclusive is true, then implement an exclusive lock. * * There is a lock file and there are two regions of the lock file: byte 0, * and byte 1. Byte 0 is the exclusive writer process area of the lock file. * If an environment is opened for write, then it attempts to take an * exclusive write lock on byte 0. Byte 1 is the shared reader process area * of the lock file. If an environment is opened for read-only, then it * attempts to take a shared lock on byte 1. This is how we implement single * writer, multi reader semantics. * * The cleaner, each time it is invoked, attempts to take an exclusive lock * on byte 1. The owning process already either has an exclusive lock on * byte 0, or a shared lock on byte 1. This will necessarily conflict with * any shared locks on byte 1, even if it's in the same process and there * are no other holders of that shared lock. So if there is only one * read-only process, it will have byte 1 for shared access, and the cleaner * can not run in it because it will attempt to get an exclusive lock on * byte 1 (which is already locked for shared access by itself). If a write * process comes along and tries to run the cleaner, it will attempt to get * an exclusive lock on byte 1. If there are no other reader processes (with * shared locks on byte 1), and no other writers (which are running cleaners * on with exclusive locks on byte 1), then the cleaner will run. */ public boolean lockEnvironment(boolean readOnly, boolean exclusive) throws DatabaseException { try { if (checkEnvHomePermissions(readOnly)) { return true; } if (lockFile == null) { lockFile = new RandomAccessFile(new File(dbEnvHome, "je" + LOCK_SUFFIX), "rw"); } channel = lockFile.getChannel(); boolean throwIt = false; try { if (exclusive) { /* * To lock exclusive, must have exclusive on shared reader * area (byte 1). */ exclLock = channel.tryLock(1, 2, false); if (exclLock == null) { return false; } return true; } else { if (readOnly) { envLock = channel.tryLock(1, 2, true); } else { envLock = channel.tryLock(0, 1, false); } if (envLock == null) { throwIt = true; } } } catch (OverlappingFileLockException e) { throwIt = true; } if (throwIt) { throw new LogException("A je" + LOCK_SUFFIX + "file exists in " + dbEnvHome + " The environment can not be locked for " + (readOnly ? "shared" : "single writer") + " access."); } } catch (IOException IOE) { throw new LogException(IOE.toString()); } return true; } public void releaseExclusiveLock() throws DatabaseException { try { if (exclLock != null) { exclLock.release(); } } catch (IOException IOE) { throw new DatabaseException(IOE); } } /** * Ensure that if the environment home dir is on readonly media or in a * readonly directory that the environment has been opened for readonly * access. * * @return true if the environment home dir is readonly. */ private boolean checkEnvHomePermissions(boolean readOnly) throws DatabaseException { boolean envDirIsReadOnly = !dbEnvHome.canWrite(); if (envDirIsReadOnly && !readOnly) { throw new DatabaseException("The Environment directory " + dbEnvHome + " is not writable, but the " + "Environment was opened for read-write access."); } return envDirIsReadOnly; } /** * Truncate a log at this position. Used by recovery to a timestamp * utilities and by recovery to set the end-of-log position. * * <p> * This method forces a new log file to be written next, if the last file * (the file truncated to) has an old version in its header. This ensures * that when the log is opened by an old version of JE, a version * incompatibility will be detected. [#11243] * </p> */ public void truncateLog(long fileNum, long offset) throws IOException, DatabaseException { FileHandle handle = makeFileHandle(fileNum, FileMode.READWRITE_MODE); RandomAccessFile file = handle.getFile(); try { file.getChannel().truncate(offset); } finally { file.close(); } if (handle.isOldHeaderVersion()) { forceNewFile = true; } } /** * Set the flag that causes a new file to be written before the next write. */ void forceNewLogFile() { forceNewFile = true; } /** * Return the offset of the first log entry after the file header. */ /** * @return the size in bytes of the file header log entry. */ public static int firstLogEntryOffset() { return FileHeader.entrySize() + LogManager.HEADER_BYTES; } /** * Return the next available LSN in the log. Note that this is * unsynchronized, so is only valid as an approximation of log size. */ public long getNextLsn() { return nextAvailableLsn; } /** * Return the last allocated LSN in the log. Note that this is * unsynchronized, so if it is called outside the log write latch it is only * valid as an approximation of log size. */ public long getLastUsedLsn() { return lastUsedLsn; } /* * fsync stats. */ public long getNFSyncs() { return syncManager.getNFSyncs(); } public long getNFSyncRequests() { return syncManager.getNFSyncRequests(); } public long getNFSyncTimeouts() { return syncManager.getNTimeouts(); } void loadStats(StatsConfig config, EnvironmentStats stats) throws DatabaseException { syncManager.loadStats(config, stats); } /* * Unit test support */ /* * @return ids of files in cache */ Set getCacheKeys() { return fileCache.getCacheKeys(); } /** * Clear a file out of the file cache regardless of mode type. */ private void clearFileCache(long fileNum) throws IOException, DatabaseException { fileCacheLatch.acquire(); try { fileCache.remove(fileNum); } finally { fileCacheLatch.release(); } } /* * The file cache keeps N RandomAccessFile objects cached for file access. * The cache consists of two parts: a Hashtable that doesn't require extra * synchronization, for the most common access, and a linked list of files * to support cache administration. Looking up a file from the hash table * doesn't require extra latching, but adding or deleting a file does. */ private static class FileCache { private Map fileMap; // Long->file private LinkedList fileList; // list of file numbers private int fileCacheSize; FileCache(DbConfigManager configManager) throws DatabaseException { /* * A fileMap maps the file number to FileHandles (RandomAccessFile, * latch). The fileList is a list of Longs to determine which files * to eject out of the file cache if it's too small. */ fileMap = new Hashtable(); fileList = new LinkedList(); fileCacheSize = configManager .getInt(EnvironmentParams.LOG_FILE_CACHE_SIZE); } private FileHandle get(Long fileId) { return (FileHandle) fileMap.get(fileId); } private void add(Long fileId, FileHandle fileHandle) throws DatabaseException { /* * Does the cache have any room or do we have to evict? Hunt down * the file list for an unused file. Note that the file cache might * actually grow past the prescribed size if there is nothing * evictable. Should we try to shrink the file cache? Presently if * it grows, it doesn't shrink. */ if (fileList.size() >= fileCacheSize) { Iterator iter = fileList.iterator(); while (iter.hasNext()) { Long evictId = (Long) iter.next(); FileHandle evictTarget = (FileHandle) fileMap.get(evictId); /* * Try to latch. If latchNoWait returns false, then another * thread owns this latch. Note that a thread that's trying * to get a new file handle should never already own the * latch on another file handle, because these latches are * meant to be short lived and only held over the i/o out of * the file. */ if (evictTarget.latchNoWait()) { try { fileMap.remove(evictId); iter.remove(); evictTarget.close(); } catch (IOException e) { throw new DatabaseException(e); } finally { evictTarget.release(); } break; } } } /* * We've done our best to evict. Add the file the the cache now * whether or not we did evict. */ fileList.add(fileId); fileMap.put(fileId, fileHandle); } /** * Take any file handles corresponding to this file name out of the * cache. A file handle could be there twice, in rd only and in r/w * mode. */ private void remove(long fileNum) throws IOException, DatabaseException { Iterator iter = fileList.iterator(); while (iter.hasNext()) { Long evictId = (Long) iter.next(); if (evictId.longValue() == fileNum) { FileHandle evictTarget = (FileHandle) fileMap.get(evictId); try { evictTarget.latch(); fileMap.remove(evictId); iter.remove(); evictTarget.close(); } finally { evictTarget.release(); } } } } private void clear() throws IOException, DatabaseException { Iterator iter = fileMap.values().iterator(); while (iter.hasNext()) { FileHandle fileHandle = (FileHandle) iter.next(); try { fileHandle.latch(); fileHandle.close(); iter.remove(); } finally { fileHandle.release(); } } fileMap.clear(); fileList.clear(); } private Set getCacheKeys() { return fileMap.keySet(); } } /** * The LogEndFileDescriptor is used to write and fsync the end of the log. * Because the JE log is append only, there is only one logical R/W file * descriptor for the whole environment. This class actually implements two * RandomAccessFile instances, one for writing and one for fsyncing, so the * two types of operations don't block each other. * * The write file descriptor is considered the master. Manipulation of this * class is done under the log write latch. Here's an explanation of why the * log write latch is sufficient to safeguard all operations. * * There are two types of callers who may use this file descriptor: the * thread that is currently writing to the end of the log and any threads * that are fsyncing on behalf of the FSyncManager. * * The writing thread appends data to the file and fsyncs the file when we * flip over to a new log file. The file is only instantiated at the point * that it must do so -- which is either when the first fsync is required by * JE or when the log file is full and we flip files. Therefore, the writing * thread has two actions that change this descriptor -- we initialize the * file descriptor for the given log file at the first write to the file, * and we close the file descriptor when the log file is full. Therefore is * a period when there is no log descriptor -- when we have not yet written * a log buffer into a given log file. * * The fsyncing threads ask for the log end file descriptor asynchronously, * but will never modify it. These threads may arrive at the point when the * file descriptor is null, and therefore skip their fysnc, but that is fine * because it means a writing thread already flipped that target file and * has moved on to the next file. * * Time Activity 10 thread 1 writes log entry A into file 0x0, issues fsync * outside of log write latch, yields the processor 20 thread 2 writes log * entry B, piggybacks off thread 1 30 thread 3 writes log entry C, but no * room left in that file, so it flips the log, and fsyncs file 0x0, all * under the log write latch. It nulls out endOfLogRWFile, moves onto file * 0x1, but doesn't create the file yet. 40 thread 1 finally comes along, * but endOfLogRWFile is null-- no need to fsync in that case, 0x0 got * fsynced. */ class LogEndFileDescriptor { private RandomAccessFile endOfLogRWFile = null; private RandomAccessFile endOfLogSyncFile = null; /** * getWritableFile must be called under the log write latch. */ RandomAccessFile getWritableFile(long fileNumber) throws RunRecoveryException { try { if (endOfLogRWFile == null) { /* * We need to make a file descriptor for the end of the log. * This is guaranteed to be called under the log write * latch. */ endOfLogRWFile = makeFileHandle(fileNumber, FileMode.READWRITE_MODE).getFile(); endOfLogSyncFile = makeFileHandle(fileNumber, FileMode.READWRITE_MODE).getFile(); } return endOfLogRWFile; } catch (Exception e) { /* * If we can't get a write channel, we need to go into * RunRecovery state. */ throw new RunRecoveryException(envImpl, e); } } /** * FSync the log file that makes up the end of the log. */ void force() throws DatabaseException, IOException { /* * Get a local copy of the end of the log file descriptor, it could * change. No need to latch, no harm done if we get an old file * descriptor, because we forcibly fsync under the log write latch * when we switch files. * * If there is no current end file descriptor, we know that the log * file has flipped to a new file since the fsync was issued. */ RandomAccessFile file = endOfLogSyncFile; if (file != null) { FileChannel channel = file.getChannel(); try { channel.force(false); } catch (ClosedChannelException e) { /* * The channel should never be closed. It may be closed * because of an interrupt received by another thread. See * SR [#10463] */ throw new RunRecoveryException(envImpl, "Channel closed, may be due to thread interrupt", e); } assert EnvironmentImpl.maybeForceYield(); } } /** * Close the end of the log file descriptor. Use atomic assignment to * ensure that we won't force and close on the same descriptor. */ void close() throws IOException { IOException firstException = null; if (endOfLogRWFile != null) { RandomAccessFile file = endOfLogRWFile; /* * Null out so that other threads know endOfLogRWFile is no * longer available. */ endOfLogRWFile = null; try { file.close(); } catch (IOException e) { /* Save this exception, so we can try the second close. */ firstException = e; } } if (endOfLogSyncFile != null) { RandomAccessFile file = endOfLogSyncFile; /* * Null out so that other threads know endOfLogSyncFile is no * longer available. */ endOfLogSyncFile = null; file.close(); } if (firstException != null) { throw firstException; } } } /* * Generate IOExceptions for testing. */ /* Testing switch. */ static boolean RUNRECOVERY_EXCEPTION_TESTING = false; /* Max write counter value. */ private static final int RUNRECOVERY_EXCEPTION_MAX = 100; /* Current write counter value. */ private int runRecoveryExceptionCounter = 0; /* Whether an exception has been thrown. */ private boolean runRecoveryExceptionThrown = false; /* Random number generator. */ private Random runRecoveryExceptionRandom = null; private void generateRunRecoveryException(RandomAccessFile file, ByteBuffer data, long destOffset) throws DatabaseException, IOException { if (runRecoveryExceptionThrown) { try { throw new Exception("Write after RunRecoveryException"); } catch (Exception e) { e.printStackTrace(); } } runRecoveryExceptionCounter += 1; if (runRecoveryExceptionCounter >= RUNRECOVERY_EXCEPTION_MAX) { runRecoveryExceptionCounter = 0; } if (runRecoveryExceptionRandom == null) { runRecoveryExceptionRandom = new Random(System.currentTimeMillis()); } if (runRecoveryExceptionCounter == runRecoveryExceptionRandom .nextInt(RUNRECOVERY_EXCEPTION_MAX)) { int len = runRecoveryExceptionRandom.nextInt(data.remaining()); if (len > 0) { byte[] a = new byte[len]; data.get(a, 0, len); ByteBuffer buf = ByteBuffer.wrap(a); writeToFile(file, buf, destOffset); } runRecoveryExceptionThrown = true; throw new RunRecoveryException(envImpl, "Randomly generated for testing"); } } }